package com.ruoyi.salesman.utils;

import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;

public class ReadDocUtil {

public static String readDoc(String path) throws IOException {
		String resullt = "";
		//首先判断文件中的是doc/docx
		try {
			if (path.endsWith(".doc")) {
				InputStream is = new FileInputStream(new File(path));
				WordExtractor re = new WordExtractor(is);
				resullt = re.getText();
				//re.close();
			} else if (path.endsWith(".docx")) {
				OPCPackage opcPackage = POIXMLDocument.openPackage(path);
				POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
				resullt = extractor.getText();
				//extractor.close();
			} else {
				System.out.println("此文件不是word文件");
			}
		} catch(Exception e){
			e.printStackTrace();
		}
		return resullt;
	}


//main方法测试
	public static void main(String[] args) {

		String filename="C:\\Users\\Administrator\\Documents\\WeChat Files\\wxid_byzclfap93th21\\FileStorage\\File\\2022-11\\隐私政策.doc";
		String textContent = null;
		try {
			textContent = readDoc(filename);
		} catch (IOException e) {
			e.printStackTrace();
		}
		System.out.println(textContent);

	}
}
